\[\\[.4in]\]

library(dplyr)
library(ggplot2)
library(lubridate)
library(MMWRweek)
library(readr)
library(rlang)
library(magrittr)
library(aws.s3)
library(data.table)
library(dplyr)
library(DT)
library(ggplot2)
library(plotly)
library(readr)
library(stringr)
library(tidyr)
suppressPackageStartupMessages(source(here::here("R", "load_all.R")))
forecast_date <- as.Date("2024-11-20")
epi_data <- readr::read_csv("https://data.cdc.gov/resource/ua7e-t2fy.csv?$limit=20000&$select=weekendingdate,jurisdiction,totalconfflunewadm")
epi_data <- epi_data %>%
  mutate(
    epiweek = epiweek(weekendingdate),
    epiyear = epiyear(weekendingdate)
  ) %>%
  left_join(
    (.) %>%
      distinct(epiweek, epiyear) %>%
      mutate(
        season = convert_epiweek_to_season(epiyear, epiweek),
        season_week = convert_epiweek_to_season_week(epiyear, epiweek)
      ),
    by = c("epiweek", "epiyear")
  )
epi_data <- epi_data %>%
  mutate(time_value = as.Date(weekendingdate), geo_value = tolower(jurisdiction), nhsn = totalconfflunewadm) %>%
  select(-weekendingdate, -jurisdiction, -totalconfflunewadm)
ahead <- 0
window_size <- 3
recent_window <- 3
geos_to_plot <- c("mn", "ca", "pa", "usa", "ri") #epi_data %>% pull(geo_value) %>% unique() %>% setdiff(c("as", "mp", "vi")) # 
all_geos_to_plot <- epi_data %>% pull(geo_value) %>% unique() %>% setdiff(c("as", "mp", "vi"))
plot_quantiles <- c(0.6, 0.75, 0.95, 0.99)
quantile_alphas <- c(0.9, 0.6, 0.4, 0.3)
min_plot_quantiles <- c(0.75, 0.90)
min_quantile_alphas <- c(0.9, 0.3)

The model used is specified here. For flu, the data for the 2020/21 and 2021/22 seasons are dropped, while for covid we’ve done versions both including and excluding.

Comparing a couple of methods

The intervals here are the 75th and 90th quantiles (more quantiles uses 60th, 75, 95, and 99th). Some notes:

quantile_basic <- lapply(-1:3, \(ahead) climatological_model(epi_data, forecast_date, ahead)) %>%
    bind_rows() %>%
    mutate(geo_type = "state", forecaster = "quantile basic")

quantile_7 <- lapply(-1:3, \(ahead) climatological_model(epi_data, forecast_date, ahead, quant_type = 7)) %>%
    bind_rows() %>%
    mutate(geo_type = "state", forecaster = "quantile_7") 

epipred_extrapolation <- lapply(-1:3, \(ahead) climatological_model(epi_data, forecast_date, ahead, quantile_method = "epipredict")) %>% bind_rows() %>%
    mutate(geo_type = "state", forecaster = "epipred quantile") 

geo_aggregate <- lapply(-1:3, \(ahead) climatological_model(epi_data, forecast_date, ahead, geo_agg = TRUE)) %>% bind_rows() %>%
    mutate(geo_type = "state", forecaster = "geo aggregated")

all_results <-  bind_rows(
  geo_aggregate,
  quantile_basic,
  epipred_extrapolation
)

Small set of states

small_truth_data <- epi_data %>%
  mutate(value = nhsn, target_end_date = time_value, forecaster = "nhsn") %>%
  filter(geo_value %in% geos_to_plot, time_value > "2022-09-01")
the_plot <-
  all_results %>%
  filter(geo_value %in% geos_to_plot) %>%
  plot_forecasts(forecast_date, geo_type = "state", truth_data = small_truth_data, quantiles = min_plot_quantiles, alphas = min_quantile_alphas)

ggplotly(the_plot, tooltip = "text", height = 1500, width = 1600) %>%
  layout(hoverlabel = list(bgcolor = "white"))

Small set of states, more quantiles

small_truth_data <- epi_data %>%
  mutate(value = nhsn, target_end_date = time_value, forecaster = "nhsn") %>%
  filter(geo_value %in% geos_to_plot, time_value > "2022-09-01")
the_plot <-
  all_results %>%
  filter(geo_value %in% geos_to_plot) %>%
  plot_forecasts(forecast_date, geo_type = "state", truth_data = small_truth_data, quantiles = plot_quantiles, alphas = quantile_alphas)

ggplotly(the_plot, tooltip = "text", height = 1500, width = 1600) %>%
  layout(hoverlabel = list(bgcolor = "white"))

Full set of states

full_truth_data <- epi_data %>%
  mutate(value = nhsn, target_end_date = time_value, forecaster = "nhsn") %>%
  filter(geo_value %in% all_geos_to_plot, time_value > "2022-09-01")
the_plot <-
  all_results %>%
  filter(geo_value %in% all_geos_to_plot) %>%
  plot_forecasts(forecast_date, geo_type = "state", truth_data = full_truth_data, quantiles = min_plot_quantiles, alphas = min_quantile_alphas)

ggplotly(the_plot, tooltip = "text", height = 5000, width = 1700) %>%
  layout(hoverlabel = list(bgcolor = "white"))

Full set of states, more quantiles

full_truth_data <- epi_data %>%
  mutate(value = nhsn, target_end_date = time_value, forecaster = "nhsn") %>%
  filter(geo_value %in% all_geos_to_plot, time_value > "2022-09-01")
the_plot <-
  all_results %>%
  filter(geo_value %in% all_geos_to_plot) %>%
  plot_forecasts(forecast_date, geo_type = "state", truth_data = full_truth_data, quantiles = plot_quantiles, alphas = quantile_alphas)

ggplotly(the_plot, tooltip = "text", height = 5000, width = 1700) %>%
  layout(hoverlabel = list(bgcolor = "white"))

Mean Averaging

These are * the mean of the geo aggregated and the epipredict quantile above * the mean of the geo aggregated and the quantile basic above * the mean of all 3

Small set of states

the_plot <- mean_climatological %>%
  filter(geo_value %in% geos_to_plot) %>%
  plot_forecasts(forecast_date, geo_type = "state", truth_data = small_truth_data, quantiles = min_plot_quantiles, alphas = min_quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 1500, width = 1600) %>%
  layout(hoverlabel = list(bgcolor = "white"))

Small set of states, more quantiles

the_plot <- mean_climatological %>%
  filter(geo_value %in% geos_to_plot) %>%
  plot_forecasts(forecast_date, geo_type = "state", truth_data = small_truth_data, quantiles = plot_quantiles, alphas = quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 1500, width = 1600) %>%
  layout(hoverlabel = list(bgcolor = "white"))

Full set of states

the_plot <- mean_climatological %>%
  filter(geo_value %in% all_geos_to_plot) %>%
  plot_forecasts(forecast_date, geo_type = "state", truth_data = full_truth_data, quantiles = min_plot_quantiles, alphas = min_quantile_alphas)

ggplotly(the_plot, tooltip = "text", height = 5000, width = 1600) %>%
  layout(hoverlabel = list(bgcolor = "white"))

Full set of states, more quantiles

the_plot <- mean_climatological %>%
  filter(geo_value %in% all_geos_to_plot) %>%
  plot_forecasts(forecast_date, geo_type = "state", truth_data = full_truth_data, quantiles = plot_quantiles, alphas = quantile_alphas)

ggplotly(the_plot, tooltip = "text", height = 5000, width = 1600) %>%
  layout(hoverlabel = list(bgcolor = "white"))

Geo Mean Averaging

Since the dynamics are exponential, averaging on the log scale is probably a better method. That way, if the national is predicting something an order of magnitude higher that won’t swamp the local information. This is the same set using the geomean instead of the arithmetic mean.

Small set of states

the_plot <- mean_climatological %>%
  filter(geo_value %in% geos_to_plot) %>%
  plot_forecasts(forecast_date, geo_type = "state", truth_data = small_truth_data, quantiles = min_plot_quantiles, alphas = min_quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 1500, width = 1600) %>%
  layout(hoverlabel = list(bgcolor = "white"))

Small set of states, more quantiles

the_plot <- mean_climatological %>%
  filter(geo_value %in% geos_to_plot) %>%
  plot_forecasts(forecast_date, geo_type = "state", truth_data = small_truth_data, quantiles = plot_quantiles, alphas = quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 1500, width = 1600) %>%
  layout(hoverlabel = list(bgcolor = "white"))

Full set of states

the_plot <- mean_climatological %>%
  filter(geo_value %in% all_geos_to_plot) %>%
  plot_forecasts(forecast_date, geo_type = "state", truth_data = full_truth_data, quantiles = min_plot_quantiles, alphas = min_quantile_alphas)

ggplotly(the_plot, tooltip = "text", height = 5000, width = 1600) %>%
  layout(hoverlabel = list(bgcolor = "white"))

Full set of states more quantiles

the_plot <- mean_climatological %>%
  filter(geo_value %in% all_geos_to_plot) %>%
  plot_forecasts(forecast_date, geo_type = "state", truth_data = full_truth_data, quantiles = plot_quantiles, alphas = quantile_alphas)

ggplotly(the_plot, tooltip = "text", height = 5000, width = 1600) %>%
  layout(hoverlabel = list(bgcolor = "white"))